SHEA Spring 2024 Data Visualization Demo

Matthew Ziegler

2024-03-26

Overview

R Introduction

R Introduction

R Introduction

Getting Started

library(dplyr)
library(ggplot2)
library(gt)

Piping Data

data %>% new_data %>% tables

data %>% different_data %>% figures

Loading our Data

dat_cdiff <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_CDI_20240214.csv") %>%
  janitor::clean_names() %>%
  filter(topic =="Case rates (cases per 100,000)", 
         series =="Community-associated"|series=="Healthcare-associated")
head(dat_cdiff) %>%
  gt()
year_name topic view_by grouping series value
2011 Case rates (cases per 100,000) Total Epi Class Community-associated 48.16
2012 Case rates (cases per 100,000) Total Epi Class Community-associated 52.88
2013 Case rates (cases per 100,000) Total Epi Class Community-associated 55.75
2014 Case rates (cases per 100,000) Total Epi Class Community-associated 57.83
2015 Case rates (cases per 100,000) Total Epi Class Community-associated 65.81
2016 Case rates (cases per 100,000) Total Epi Class Community-associated 67.20

Let’s do a simple graph

dat_cdiff_line_plot <- dat_cdiff %>%
  ggplot(aes(x = as.factor(year_name), y = value, group = series)) + 
  geom_line(aes(linetype = series)) +
  labs(title = "Cases by year", y = "CDI cases per 1000 individuals", x = "Year")

Let’s do a simple graph

Let’s do a simple graph

dat_cdiff %>%
  ggplot(aes(x = as.factor(year_name), y = value, fill= series)) + 
  geom_col(position = "dodge") +
  labs(title = "Cases by year", y = "CDI cases per 1000 individuals", x = "Year")

A little more complicated

library(tidyr)
dat_cdiff_cat_plot <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_CDI_20240214.csv") %>%
  janitor::clean_names() %>%
  filter(topic =="Case rates (cases per 100,000)") %>%
  mutate(cat = case_when(
    grepl("HA|CA", series) ==TRUE & grepl("years", series) ==TRUE  ~ "age",
    grepl("Male|Female", series) ==TRUE & grepl("HA|CA", series) ==TRUE  ~ "sex",
    grepl("White|Non-white", series) ==TRUE & grepl("HA|CA", series) ==TRUE ~ "race")) %>%
  filter(!is.na(cat)) %>%
  separate(series, into = c("category","group"), sep =" - ") %>%
  ggplot(aes(x = as.factor(year_name), y = value, 
             group = interaction(group, category),linetype = category, col = group,)) + 
  geom_line(lwd =1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 75, vjust = 0, hjust=0)) +
  facet_wrap(vars(cat)) +
  labs(title = "C.difficile Infection by Year", y = "Case rates (cases per 100,000)", x = "Year") 

A little more complicated

dat_cdiff_cat_plot

Highlighting and annotating

library(gghighlight)
dat_mdrgn <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/HAICViz_-_MuGSI_20240330.csv") %>%
  janitor::clean_names() %>%
  mutate(keep = case_when(
    viewby =="Organism" & series != "All cases"  ~ 1,
    organism =="CRAB" & viewby == "All cases" & topic =="Case Rates" ~ 1,
    TRUE ~ 0
  )) %>%
  filter(keep ==1) %>%
  mutate(series = ifelse(organism =="CRAB","Acinetobacter baumanii", series))
dat_mdrgn_plot <- dat_mdrgn %>%  
  ggplot(aes(x = as.factor(year_name), y = value, group = series)) + 
  geom_line(aes(linetype = series)) +
  theme(axis.text.x = element_text(angle = 75, vjust = 0, hjust=0)) +
  labs(title = "Cases by year - Carbapenem-Resistant GNB",
       y = "Cases per 1000 individuals", x = "Year") +
  gghighlight(series =="Acinetobacter baumanii") +
  theme_minimal()

Animated figures

library(magick)
library(gganimate)
library(maps)
library(tidyr)
respi <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/Outpatient_Respiratory_Illness_Activity_Map_20240401.csv") %>%
  janitor::clean_names() %>%
  mutate(region = tolower(state)) %>%
  separate(activity_level, into=c(NA, "level"), sep = " ") %>%
  mutate(level = as.numeric(level)) %>%
  filter(season =="2022-2023")

states <- map_data("state")
region_dat_respi <- left_join(states, respi, by = "region")
gif_a <- region_dat_respi %>%
ggplot(., aes(long, lat, group = group)) +
  geom_polygon(aes(fill = level),
               colour = alpha("white", 1/2), size = 0.05)  +
  geom_polygon(data = states, colour = "black", fill = NA) +
  scale_fill_gradientn(colours = terrain.colors(6))  +
  theme_void() +
  transition_time(week) +
  labs(title = 'Respiratory Infection Activity 22-23 Season: Week {frame_time}') +
  theme_minimal()

gif_a <- animate(gif_a, width = 700, height = 480)

Animate

gif_b <- region_dat_respi %>%
  #filter(!is.na(value)) %>%
  ggplot(data = ., aes(y = level)) + geom_boxplot() +
  labs(x = "", title = "National Value") +
  theme(axis.text.x = element_blank()) +
  transition_time(week)  
  #enter_fade() + 
  #exit_shrink() +
  #ease_aes('sine-in-out') 

gif_b <- animate(gif_b, width = 600, height = 480)

Animate

a_mgif <- image_read(gif_a)
b_mgif <- image_read(gif_b)

new_gif <- image_append(c(a_mgif[1], b_mgif[1]))
for(i in 1:95){
  combined <- image_append(c(a_mgif[i], b_mgif[i]))
  new_gif <- c(new_gif, combined)
}

Plotting Model Output

library(gtsummary)
library(marginaleffects)

latitude_by_states <- states %>%
  group_by(region) %>%
  summarise(mean_lat = mean(lat))

Plotting Model Output

vaccination <- read.csv("/Users/mattz/Documents/GitHub/shea24_demo/Vaccination_Coverage_among_Health_Care_Personnel_20240401.csv") %>%
  janitor::clean_names() %>%
  mutate(year = as.numeric(substr(season,1,4))) %>%
  mutate(region = tolower(geography)) %>%
  left_join(latitude_by_states, by = "region") %>%
  rename(latitude = mean_lat) %>%
  filter(personnel_type != "All Health Care Personnel")

Plotting Model Output

model <- lm(estimate ~ year + latitude + personnel_type, dat = vaccination)

Plotting Model Output

tbl_regression(model)
Characteristic Beta 95% CI1 p-value
year 1.2 0.96, 1.5 <0.001
latitude 0.34 0.22, 0.47 <0.001
personnel_type
    Adult Students/Trainees and Volunteers
    Employees 3.2 1.7, 4.6 <0.001
    Licensed Independent Practitioners -14 -16, -13 <0.001
1 CI = Confidence Interval

Plotting Model Output

plot_predictions(model, condition = "year") +
  labs(y = "Estimated Proportion: Vaccine Compliance", x= "Year", 
       title = "Estimated Vaccine Compliance by Year") +
  theme_minimal()

Plotting Model Output

What Else?

Conclusions

Resources

Citations